library(haven)
library(readr)
library(data.table)
library(scales)
library(dplyr)
library(lfe)
library(stargazer)
cohort <- fread("H:/Zheng_10223/Joint/cohort_2025.csv")

############################

cohort$MainParent_Income_HH_MainParentAge45_49_ventparent=cut(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent, breaks=seq(0,100,5), labels=c(5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100))



#######################################################
model_refugees=lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent,data=cohort[cohort$ImmigrationCategory=="Refugee",])
stargazer(model_refugees)


model_nonrefugees=lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent,data=cohort[cohort$ImmigrationCategory!="Refugee",])
stargazer(model_nonrefugees)

#### FIGURE 2: BINSCATTER ####
par(mar=c(4.5,4.7,2,2))
plot(cohort[!ImmigrationCategory=="Refugee", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"], pch=19, col="black", cex=1.2, xlab="Parent Rank", ylab="Child Rank", ylim=c(40,80), cex.lab=1.5, cex.axis=1.5)
points(cohort[ImmigrationCategory=="Refugee", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"], pch=18, col="forestgreen", cex=1.5)
legend(x=60, y=48, pch=c(19,18), col=c("black", "forestgreen"), title="Immigrant Category                              ", cex=1.3, bty="n",
       c(paste("Non-Refugee; IGM=", round(lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent, data=cohort[!ImmigrationCategory=="Refugee"])$coef[2],3), sep=""),
         paste("Refugee & Humanitarian; IGM=", round(lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent, data=cohort[ImmigrationCategory=="Refugee"])$coef[2],3), sep="")))
abline(lm(cohort[!ImmigrationCategory=="Refugee", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"][,c(2,1)]), col="black", lwd=2)
abline(lm(cohort[ImmigrationCategory=="Refugee", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"][,c(2,1)]), col="forestgreen", lwd=2)

# with Connolly 
plot(cohort[!ImmigrationCategory=="Refugee", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"], pch=19, col="black", cex=1.2, xlab="Parent Rank", ylab="Child Rank", ylim=c(35,80), cex.lab=1.5, cex.axis=1.5)
points(cohort[ImmigrationCategory=="Refugee", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"], pch=18, col="forestgreen", cex=1.5)
legend(x=60, y=45, pch=c(19,18,NA), lty=c(1,1,2), col=c("black", "forestgreen", "grey60"), title="Immigrant Category                            ", cex=1.3, bty="n",
       lwd=c(2,2,2),c(paste("Non-Refugee; IGM=", round(lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent, data=cohort[!ImmigrationCategory=="Refugee"])$coef[2],3), sep=""),
         paste("Refugee & Humanitarian; IGM=", round(lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent, data=cohort[ImmigrationCategory=="Refugee"])$coef[2],3), sep=""),
               "Canadian-Born; IGM=0.286"))
abline(lm(cohort[!ImmigrationCategory=="Refugee", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"][,c(2,1)]), col="black", lwd=2)
abline(lm(cohort[ImmigrationCategory=="Refugee", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_pctparent"][,c(2,1)]), col="forestgreen", lwd=2)
abline(a=36.536, b=0.286, col="grey60", lwd=2, lty=2)

# counts : 
dfrefugees=cohort %>% filter(ImmigrationCategory=="Refugee") %>% group_by(MainParent_Income_HH_MainParentAge45_49_pctparent) %>% summarize(count=n())
write.csv(dfrefugees,"H:/Zheng_10223/ToVet/Supporting/Figure1_refugees.csv")



dfnonrefugees=cohort %>% filter(ImmigrationCategory!="Refugee") %>% group_by(MainParent_Income_HH_MainParentAge45_49_pctparent) %>% summarize(count=n())
write.csv(dfnonrefugees,"H:/Zheng_10223/ToVet/Supporting/Figure1_nonrefugees.csv")
